Add ChatDatabricks() (#82)

cpsievert · web-flow · commit 72ec555b7826 · 2025-04-16T09:49:36.000-05:00
* Close #73: Add ChatDatabricks() * Update changelog * Set secrets as env vars * Use tenacity to retry flaky tests * Tweak docstring
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -29,6 +29,8 @@ jobs:
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
+      DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
       # Free tier of Google is rate limited, so we only test on 3.12
       TEST_GOOGLE: ${{ matrix.config.test_google }}
       # Free tier of Azure is rate limited, so we only test on 3.12
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### New features
 
+* Added `ChatDatabricks()`, for chatting with Databrick's [foundation models](https://docs.databricks.com/aws/en/machine-learning/model-serving/score-foundation-models). (#82)
 * `.stream()` and `.stream_async()` gain a `content` argument. Set this to `"all"` to include `ContentToolRequest` and `ContentToolResponse` instances in the stream. (#75)
 * `ContentToolRequest` and `ContentToolResponse` are now exported to `chatlas` namespace. (#75)
 * `ContentToolRequest` and `ContentToolResponse` now have `.tagify()` methods, making it so they can render automatically in a Shiny chatbot. (#75)
diff --git a/README.md b/README.md
@@ -47,6 +47,7 @@ It also supports the following enterprise cloud providers:
 
 * AWS Bedrock: [`ChatBedrockAnthropic()`](https://posit-dev.github.io/chatlas/reference/ChatBedrockAnthropic.html).
 * Azure OpenAI: [`ChatAzureOpenAI()`](https://posit-dev.github.io/chatlas/reference/ChatAzureOpenAI.html).
+* Databricks: [`ChatDatabricks()`](https://posit-dev.github.io/chatlas/reference/ChatDatabricks.html).
 * Snowflake Cortex: [`ChatSnowflake()`](https://posit-dev.github.io/chatlas/reference/ChatSnowflake.html).
 * Vertex AI: [`ChatVertex()`](https://posit-dev.github.io/chatlas/reference/ChatVertex.html).
 
diff --git a/chatlas/__init__.py b/chatlas/__init__.py
@@ -5,6 +5,7 @@
 from ._content import ContentToolRequest, ContentToolResult
 from ._content_image import content_image_file, content_image_plot, content_image_url
 from ._content_pdf import content_pdf_file, content_pdf_url
+from ._databricks import ChatDatabricks
 from ._github import ChatGithub
 from ._google import ChatGoogle, ChatVertex
 from ._groq import ChatGroq
@@ -27,6 +28,7 @@
     "ChatAnthropic",
     "ChatAuto",
     "ChatBedrockAnthropic",
+    "ChatDatabricks",
     "ChatGithub",
     "ChatGoogle",
     "ChatGroq",
diff --git a/chatlas/_auto.py b/chatlas/_auto.py
@@ -6,6 +6,7 @@
 
 from ._anthropic import ChatAnthropic, ChatBedrockAnthropic
 from ._chat import Chat
+from ._databricks import ChatDatabricks
 from ._github import ChatGithub
 from ._google import ChatGoogle, ChatVertex
 from ._groq import ChatGroq
@@ -18,6 +19,7 @@
 AutoProviders = Literal[
     "anthropic",
     "bedrock-anthropic",
+    "databricks",
     "github",
     "google",
     "groq",
@@ -32,6 +34,7 @@
 _provider_chat_model_map: dict[AutoProviders, Callable[..., Chat]] = {
     "anthropic": ChatAnthropic,
     "bedrock-anthropic": ChatBedrockAnthropic,
+    "databricks": ChatDatabricks,
     "github": ChatGithub,
     "google": ChatGoogle,
     "groq": ChatGroq,
diff --git a/chatlas/_databricks.py b/chatlas/_databricks.py
@@ -0,0 +1,145 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+from ._chat import Chat
+from ._logging import log_model_default
+from ._openai import OpenAIProvider
+from ._turn import Turn, normalize_turns
+
+if TYPE_CHECKING:
+    from databricks.sdk import WorkspaceClient
+
+    from ._openai import ChatCompletion
+    from .types.openai import SubmitInputArgs
+
+
+def ChatDatabricks(
+    *,
+    system_prompt: Optional[str] = None,
+    model: Optional[str] = None,
+    turns: Optional[list[Turn]] = None,
+    workspace_client: Optional["WorkspaceClient"] = None,
+) -> Chat["SubmitInputArgs", ChatCompletion]:
+    """
+    Chat with a model hosted on Databricks.
+
+    Databricks provides out-of-the-box access to a number of [foundation
+    models](https://docs.databricks.com/en/machine-learning/model-serving/score-foundation-models.html)
+    and can also serve as a gateway for external models hosted by a third party.
+
+    Prerequisites
+    --------------
+
+    ::: {.callout-note}
+    ## Python requirements
+
+    `ChatDatabricks` requires the `databricks-sdk` package: `pip install
+    "chatlas[databricks]"`.
+    :::
+
+    ::: {.callout-note}
+    ## Authentication
+
+    `chatlas` delegates to the `databricks-sdk` package for authentication with
+    Databricks. As such, you can use any of the authentication methods discussed
+    here:
+
+    https://docs.databricks.com/aws/en/dev-tools/sdk-python#authentication
+
+    Note that Python-specific article points to this language-agnostic "unified"
+    approach to authentication:
+
+    https://docs.databricks.com/aws/en/dev-tools/auth/unified-auth
+
+    There, you'll find all the options listed, but a simple approach that
+    generally works well is to set the following environment variables:
+
+    * `DATABRICKS_HOST`: The Databricks host URL for either the Databricks
+      workspace endpoint or the Databricks accounts endpoint.
+    * `DATABRICKS_TOKEN`: The Databricks personal access token.
+    :::
+
+    Parameters
+    ----------
+    system_prompt
+        A system prompt to set the behavior of the assistant.
+    model
+        The model to use for the chat. The default, None, will pick a reasonable
+        default, and warn you about it. We strongly recommend explicitly
+        choosing a model for all but the most casual use.
+    turns
+        A list of turns to start the chat with (i.e., continuing a previous
+        conversation). If not provided, the conversation begins from scratch. Do
+        not provide non-`None` values for both `turns` and `system_prompt`. Each
+        message in the list should be a dictionary with at least `role` (usually
+        `system`, `user`, or `assistant`, but `tool` is also possible). Normally
+        there is also a `content` field, which is a string.
+    workspace_client
+        A `databricks.sdk.WorkspaceClient()` to use for the connection. If not
+        provided, a new client will be created.
+
+    Returns
+    -------
+    Chat
+        A chat object that retains the state of the conversation.
+    """
+    if model is None:
+        model = log_model_default("databricks-dbrx-instruct")
+
+    return Chat(
+        provider=DatabricksProvider(
+            model=model,
+            workspace_client=workspace_client,
+        ),
+        turns=normalize_turns(
+            turns or [],
+            system_prompt,
+        ),
+    )
+
+
+class DatabricksProvider(OpenAIProvider):
+    def __init__(
+        self,
+        *,
+        model: str,
+        workspace_client: Optional["WorkspaceClient"] = None,
+    ):
+        try:
+            from databricks.sdk import WorkspaceClient
+        except ImportError:
+            raise ImportError(
+                "`ChatDatabricks()` requires the `databricks-sdk` package. "
+                "Install it with `pip install databricks-sdk[openai]`."
+            )
+
+        try:
+            import httpx
+            from openai import AsyncOpenAI
+        except ImportError:
+            raise ImportError(
+                "`ChatDatabricks()` requires the `openai` package. "
+                "Install it with `pip install openai`."
+            )
+
+        self._model = model
+        self._seed = None
+
+        if workspace_client is None:
+            workspace_client = WorkspaceClient()
+
+        client = workspace_client.serving_endpoints.get_open_ai_client()
+
+        self._client = client
+
+        # The databricks sdk does currently expose an async client, but we can
+        # effectively mirror what .get_open_ai_client() does internally.
+        # Note also there is a open PR to add async support that does essentially
+        # the same thing:
+        # https://github.com/databricks/databricks-sdk-py/pull/851
+        self._async_client = AsyncOpenAI(
+            base_url=client.base_url,
+            api_key="no-token",  # A placeholder to pass validations, this will not be used
+            http_client=httpx.AsyncClient(auth=client._client.auth),
+        )
diff --git a/chatlas/_openai.py b/chatlas/_openai.py
@@ -325,7 +325,8 @@ def _chat_perform_args(
                 del kwargs_full["tools"]
 
         if stream and "stream_options" not in kwargs_full:
-            kwargs_full["stream_options"] = {"include_usage": True}
+            if self.__class__.__name__ != "DatabricksProvider":
+                kwargs_full["stream_options"] = {"include_usage": True}
 
         return kwargs_full
 
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
@@ -80,6 +80,7 @@ quartodoc:
         - ChatAuto
         - ChatAzureOpenAI
         - ChatBedrockAnthropic
+        - ChatDatabricks
         - ChatGithub
         - ChatGoogle
         - ChatGroq
diff --git a/pyproject.toml b/pyproject.toml
@@ -50,6 +50,7 @@ dev = [
     "google-genai>=1.2.0",
     "numpy>1.24.4",
     "tiktoken",
+    "databricks-sdk",
     "snowflake-ml-python",
     # torch (a dependency of snowflake-ml-python) is not yet compatible with Python >3.11
     "torch;python_version<='3.11'",
@@ -73,6 +74,7 @@ docs = [
 # Provider extras ----
 anthropic = ["anthropic"]
 bedrock-anthropic = ["anthropic[bedrock]"]
+databricks = ["databricks-sdk[openai]"]
 github = ["openai"]
 google = ["google-genai"]
 groq = ["openai"]
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,6 +1,6 @@
 import tempfile
 from pathlib import Path
-from typing import Awaitable, Callable
+from typing import Callable
 
 import pytest
 from chatlas import (
@@ -14,6 +14,7 @@
 )
 from PIL import Image
 from pydantic import BaseModel
+from tenacity import retry, wait_exponential
 
 ChatFun = Callable[..., Chat]
 
@@ -34,36 +35,18 @@ class ArticleSummary(BaseModel):
 """
 
 
-def retryassert(assert_func: Callable[..., None], retries=1):
-    for _ in range(retries):
-        try:
-            return assert_func()
-        except Exception:
-            pass
-    return assert_func()
-
-
-async def retryassert_async(assert_func: Callable[..., Awaitable[None]], retries=1):
-    for _ in range(retries):
-        try:
-            return await assert_func()
-        except Exception:
-            pass
-    return await assert_func()
-
-
 def assert_turns_system(chat_fun: ChatFun):
     system_prompt = "Return very minimal output, AND ONLY USE UPPERCASE."
 
     chat = chat_fun(system_prompt=system_prompt)
     response = chat.chat("What is the name of Winnie the Pooh's human friend?")
     response_text = str(response)
     assert len(chat.get_turns()) == 2
-    assert "CHRISTOPHER ROBIN" in response_text
+    assert "CHRISTOPHER ROBIN" in response_text.upper()
 
     chat = chat_fun(turns=[Turn("system", system_prompt)])
     response = chat.chat("What is the name of Winnie the Pooh's human friend?")
-    assert "CHRISTOPHER ROBIN" in str(response)
+    assert "CHRISTOPHER ROBIN" in str(response).upper()
     assert len(chat.get_turns()) == 2
 
 
@@ -267,3 +250,9 @@ def assert_pdf_local(chat_fun: ChatFun):
         "Two word answer only.",
     )
     assert "red delicious" in str(response).lower()
+
+
+retry_api_call = retry(
+    wait=wait_exponential(min=1, max=60),
+    reraise=True,
+)
diff --git a/tests/test_provider_anthropic.py b/tests/test_provider_anthropic.py
@@ -13,8 +13,7 @@
     assert_tools_simple_stream_content,
     assert_turns_existing,
     assert_turns_system,
-    retryassert,
-    retryassert_async,
+    retry_api_call,
 )
 
 
@@ -50,53 +49,40 @@ def test_anthropic_respects_turns_interface():
     assert_turns_existing(chat_fun)
 
 
+@retry_api_call
 def test_anthropic_tool_variations():
     chat_fun = ChatAnthropic
-
-    def run_simpleassert():
-        assert_tools_simple(chat_fun)
-
-    retryassert(run_simpleassert, retries=5)
-
+    assert_tools_simple(chat_fun)
     assert_tools_simple_stream_content(chat_fun)
+    assert_tools_sequential(chat_fun, total_calls=6)
 
-    def run_parallelassert():
-        # For some reason, at the time of writing, Claude 3.7 doesn't
-        # respond with multiple tools at once for this test (but it does)
-        # answer the question correctly with sequential tools.
-        def chat_fun2(**kwargs):
-            return ChatAnthropic(model="claude-3-5-sonnet-latest", **kwargs)
-
-        assert_tools_parallel(chat_fun2)
 
-    retryassert(run_parallelassert, retries=5)
+@retry_api_call
+def test_anthropic_tool_variations_parallel():
+    # For some reason, at the time of writing, Claude 3.7 doesn't
+    # respond with multiple tools at once for this test (but it does)
+    # answer the question correctly with sequential tools.
+    def chat_fun(**kwargs):
+        return ChatAnthropic(model="claude-3-5-sonnet-latest", **kwargs)
 
-    # Fails occassionally returning "" instead of Susan
-    def run_sequentialassert():
-        assert_tools_sequential(chat_fun, total_calls=6)
-
-    retryassert(run_sequentialassert, retries=5)
+    assert_tools_parallel(chat_fun)
 
 
 @pytest.mark.asyncio
+@retry_api_call
 async def test_anthropic_tool_variations_async():
-    async def run_asyncassert():
-        await assert_tools_async(ChatAnthropic)
-
-    await retryassert_async(run_asyncassert, retries=5)
+    await assert_tools_async(ChatAnthropic)
 
 
 def test_data_extraction():
     assert_data_extraction(ChatAnthropic)
 
 
+@retry_api_call
 def test_anthropic_images():
     chat_fun = ChatAnthropic
 
-    def run_inlineassert():
-        assert_images_inline(chat_fun)
-
-    retryassert(run_inlineassert, retries=3)
+    assert_images_inline(chat_fun)
     assert_images_remote_error(chat_fun)
 
 
diff --git a/tests/test_provider_databricks.py b/tests/test_provider_databricks.py