review suggestions

hanouticelina · hanouticelina · commit adfc2548918e · 2025-07-02T17:57:10.000+02:00
diff --git a/docs/models/huggingface.md b/docs/models/huggingface.md
@@ -57,7 +57,7 @@ from pydantic_ai import Agent
 from pydantic_ai.models.huggingface import HuggingFaceModel
 from pydantic_ai.providers.huggingface import HuggingFaceProvider
 
-model = HuggingFaceModel('Qwen/Qwen3-235B-A22B', provider=HuggingFaceProvider(api_key='hf_token', provider='nebius'))
+model = HuggingFaceModel('Qwen/Qwen3-235B-A22B', provider=HuggingFaceProvider(api_key='hf_token', provider_name='nebius'))
 agent = Agent(model)
 ...
 ```
diff --git a/pydantic_ai_slim/pydantic_ai/providers/huggingface.py b/pydantic_ai_slim/pydantic_ai/providers/huggingface.py
@@ -1,6 +1,7 @@
 from __future__ import annotations as _annotations
 
 import os
+from typing import overload
 
 from httpx import AsyncClient
 
@@ -32,13 +33,26 @@ def base_url(self) -> str:
     def client(self) -> AsyncInferenceClient:
         return self._client
 
+    @overload
+    def __init__(self, *, base_url: str, api_key: str | None = None) -> None: ...
+    @overload
+    def __init__(self, *, provider_name: str, api_key: str | None = None) -> None: ...
+    @overload
+    def __init__(self, *, hf_client: AsyncInferenceClient, api_key: str | None = None) -> None: ...
+    @overload
+    def __init__(self, *, hf_client: AsyncInferenceClient, base_url: str, api_key: str | None = None) -> None: ...
+    @overload
+    def __init__(self, *, hf_client: AsyncInferenceClient, provider_name: str, api_key: str | None = None) -> None: ...
+    @overload
+    def __init__(self, *, api_key: str | None = None) -> None: ...
+
     def __init__(
         self,
         base_url: str | None = None,
         api_key: str | None = None,
         hf_client: AsyncInferenceClient | None = None,
         http_client: AsyncClient | None = None,
-        provider: str | None = None,
+        provider_name: str | None = None,
     ) -> None:
         """Create a new Hugging Face provider.
 
@@ -50,9 +64,9 @@ def __init__(
                 [`AsyncInferenceClient`](https://huggingface.co/docs/huggingface_hub/v0.29.3/en/package_reference/inference_client#huggingface_hub.AsyncInferenceClient)
                 client to use. If not provided, a new instance will be created.
             http_client: (currently ignored) An existing `httpx.AsyncClient` to use for making HTTP requests.
-            provider : Name of the provider to use for inference. available providers can be found in the [HF Inference Providers documentation](https://huggingface.co/docs/inference-providers/index#partners).
+            provider_name : Name of the provider to use for inference. available providers can be found in the [HF Inference Providers documentation](https://huggingface.co/docs/inference-providers/index#partners).
                 defaults to "auto", which will select the first available provider for the model, the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
-                If `base_url` is passed, then `provider` is not used.
+                If `base_url` is passed, then `provider_name` is not used.
         """
         api_key = api_key or os.environ.get('HF_TOKEN')
 
@@ -63,12 +77,12 @@ def __init__(
             )
 
         if http_client is not None:
-            raise ValueError('`http_client` is ignored for HuggingFace provider, please use `hf_client` instead')
+            raise ValueError('`http_client` is ignored for HuggingFace provider, please use `hf_client` instead.')
 
-        if base_url is not None and provider is not None:
-            raise ValueError('Cannot provide both `base_url` and `provider`')
+        if base_url is not None and provider_name is not None:
+            raise ValueError('Cannot provide both `base_url` and `provider_name`.')
 
         if hf_client is None:
-            self._client = AsyncInferenceClient(api_key=api_key, provider=provider, base_url=base_url)  # type: ignore
+            self._client = AsyncInferenceClient(api_key=api_key, provider=provider_name, base_url=base_url)  # type: ignore
         else:
             self._client = hf_client
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -294,7 +294,7 @@ def openrouter_api_key() -> str:
 
 @pytest.fixture(scope='session')
 def huggingface_api_key() -> str:
-    return os.getenv('HF_TOKEN', 'hf_token') or os.getenv('HUGGINGFACE_API_KEY', 'hf_token')
+    return os.getenv('HF_TOKEN', 'hf_token')
 
 
 @pytest.fixture(scope='session')
@@ -428,7 +428,7 @@ def model(
 
             return HuggingFaceModel(
                 'Qwen/Qwen2.5-72B-Instruct',
-                provider=HuggingFaceProvider(provider='nebius', api_key=huggingface_api_key),
+                provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key),
             )
         else:
             raise ValueError(f'Unknown model: {request.param}')
diff --git a/tests/models/cassettes/test_huggingface/test_hf_model_instructions.yaml b/tests/models/cassettes/test_huggingface/test_hf_model_instructions.yaml
@@ -1,4 +1,66 @@
 interactions:
+- request:
+    body: null
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+    method: GET
+    uri: https://huggingface.co/api/models/Qwen/Qwen2.5-72B-Instruct?expand=inferenceProviderMapping
+  response:
+    headers:
+      access-control-allow-origin:
+      - https://huggingface.co
+      access-control-expose-headers:
+      - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+      connection:
+      - keep-alive
+      content-length:
+      - '701'
+      content-type:
+      - application/json; charset=utf-8
+      cross-origin-opener-policy:
+      - same-origin
+      etag:
+      - W/"2bd-diYmxjldwbIbFgWNRPBqJ3SEIak"
+      referrer-policy:
+      - strict-origin-when-cross-origin
+      vary:
+      - Origin
+    parsed_body:
+      _id: 66e81cefd1b1391042d0e47e
+      id: Qwen/Qwen2.5-72B-Instruct
+      inferenceProviderMapping:
+        featherless-ai:
+          providerId: Qwen/Qwen2.5-72B-Instruct
+          status: live
+          task: conversational
+        fireworks-ai:
+          providerId: accounts/fireworks/models/qwen2p5-72b-instruct
+          status: live
+          task: conversational
+        hyperbolic:
+          providerId: Qwen/Qwen2.5-72B-Instruct
+          status: live
+          task: conversational
+        nebius:
+          providerId: Qwen/Qwen2.5-72B-Instruct-fast
+          status: live
+          task: conversational
+        novita:
+          providerId: qwen/qwen-2.5-72b-instruct
+          status: live
+          task: conversational
+        together:
+          providerId: Qwen/Qwen2.5-72B-Instruct-Turbo
+          status: live
+          task: conversational
+    status:
+      code: 200
+      message: OK
 - request:
     body: null
     headers: {}
@@ -40,8 +102,8 @@ interactions:
           role: assistant
           tool_calls: []
         stop_reason: null
-      created: 1749475551
-      id: chatcmpl-6fa46f85f4f04beda9c936d5996b22a8
+      created: 1751470757
+      id: chatcmpl-b3936940372c481b8d886e596dc75524
       model: Qwen/Qwen2.5-72B-Instruct-fast
       object: chat.completion
       prompt_logprobs: null
diff --git a/tests/models/test_huggingface.py b/tests/models/test_huggingface.py
@@ -125,7 +125,8 @@ async def test_simple_completion(allow_model_requests: None):
     c = completion_message(ChatCompletionInputMessage(content='world', role='assistant'))  # type:ignore
     mock_client = MockHuggingFace.create_mock(c)
     model = HuggingFaceModel(
-        'Qwen/Qwen2.5-72B-Instruct', provider=HuggingFaceProvider(provider='nebius', hf_client=mock_client, api_key='x')
+        'Qwen/Qwen2.5-72B-Instruct',
+        provider=HuggingFaceProvider(provider_name='nebius', hf_client=mock_client, api_key='x'),
     )
     agent = Agent(model)
 
@@ -148,7 +149,8 @@ async def test_request_simple_usage(allow_model_requests: None):
     c = completion_message(ChatCompletionInputMessage(content='world', role='assistant'))  # type:ignore
     mock_client = MockHuggingFace.create_mock(c)
     model = HuggingFaceModel(
-        'Qwen/Qwen2.5-72B-Instruct', provider=HuggingFaceProvider(provider='nebius', hf_client=mock_client, api_key='x')
+        'Qwen/Qwen2.5-72B-Instruct',
+        provider=HuggingFaceProvider(provider_name='nebius', hf_client=mock_client, api_key='x'),
     )
     agent = Agent(model)
 
@@ -181,7 +183,8 @@ async def test_request_structured_response(allow_model_requests: None):
 
     mock_client = MockHuggingFace.create_mock(c)
     model = HuggingFaceModel(
-        'Qwen/Qwen2.5-72B-Instruct', provider=HuggingFaceProvider(provider='nebius', hf_client=mock_client, api_key='x')
+        'Qwen/Qwen2.5-72B-Instruct',
+        provider=HuggingFaceProvider(provider_name='nebius', hf_client=mock_client, api_key='x'),
     )
     agent = Agent(model, output_type=list[int])
 
@@ -652,7 +655,7 @@ def test_model_status_error(allow_model_requests: None) -> None:
 @pytest.mark.vcr()
 async def test_request_simple_success_with_vcr(allow_model_requests: None, huggingface_api_key: str):
     m = HuggingFaceModel(
-        'Qwen/Qwen2.5-72B-Instruct', provider=HuggingFaceProvider(provider='nebius', api_key=huggingface_api_key)
+        'Qwen/Qwen2.5-72B-Instruct', provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key)
     )
     agent = Agent(m)
     result = await agent.run('hello')
@@ -664,7 +667,7 @@ async def test_request_simple_success_with_vcr(allow_model_requests: None, huggi
 @pytest.mark.vcr()
 async def test_hf_model_instructions(allow_model_requests: None, huggingface_api_key: str):
     m = HuggingFaceModel(
-        'Qwen/Qwen2.5-72B-Instruct', provider=HuggingFaceProvider(provider='nebius', api_key=huggingface_api_key)
+        'Qwen/Qwen2.5-72B-Instruct', provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key)
     )
 
     def simple_instructions(ctx: RunContext):
@@ -684,7 +687,7 @@ def simple_instructions(ctx: RunContext):
                 usage=Usage(requests=1, request_tokens=26, response_tokens=2, total_tokens=28),
                 model_name='Qwen/Qwen2.5-72B-Instruct-fast',
                 timestamp=IsDatetime(),
-                vendor_id='chatcmpl-6fa46f85f4f04beda9c936d5996b22a8',
+                vendor_id='chatcmpl-b3936940372c481b8d886e596dc75524',
             ),
         ]
     )
diff --git a/tests/providers/test_huggingface.py b/tests/providers/test_huggingface.py
@@ -44,7 +44,7 @@ def test_huggingface_provider_pass_http_client() -> None:
         ValueError,
         match=re.escape('`http_client` is ignored for HuggingFace provider, please use `hf_client` instead'),
     ):
-        HuggingFaceProvider(http_client=http_client, api_key='api-key')
+        HuggingFaceProvider(http_client=http_client, api_key='api-key')  # type: ignore
 
 
 def test_huggingface_provider_pass_hf_client() -> None: